Regression trees example 1

Author

Murray Logan

Published

05/07/2025

1 Preparations

Load the necessary libraries

library(gbm) # for gradient boosted models
library(car)
library(dismo)
library(pdp)
library(ggfortify)
library(randomForest)
library(tidyverse)
library(gridExtra)
library(patchwork)
library(easystats)

2 Scenario

Loyn (1987) modelled the abundance of forest birds with six predictor variables (patch area, distance to nearest patch, distance to nearest larger patch, grazing intensity, altitude and years since the patch had been isolated).

Figure 1: Regent honeyeater
Table 1: Format of loyn.csv data file
ABUND DIST LDIST AREA GRAZE ALT YR.ISOL
.. .. .. .. .. .. ..
Table 2: Description of the variables in the loyn data file
ABUND Abundance of forest birds in patch- response variable
DIST Distance to nearest patch - predictor variable
LDIST Distance to nearest larger patch - predictor variable
AREA Size of the patch - predictor variable
GRAZE Grazing intensity (1 to 5, representing light to heavy) - predictor variable
ALT Altitude - predictor variable
YR.ISOL Number of years since the patch was isolated - predictor variable

The aim of the analysis is to investigate the effects of a range of predictors on the abundance of forest birds.

We have previously analysed these data in Example 4 and the Bayesian version (Example 4). On those occasions, we used a multiple linear model with scaled predictors (some of which were also log-transformed) against a lognormal distribution.

On this occassion we will take a different approach. We will use regression trees in order to explore which variables might be “important” drivers of bird abundances and the possible nature of any relationships and interactions. Such an analysis might help refine sensible candidate models to explore via linear modelling, yet might also serve as either an analysis in its own right

3 Read in the data

loyn <- read_csv("../data/loyn.csv", trim_ws = TRUE)
Rows: 56 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (7): ABUND, AREA, YR.ISOL, DIST, LDIST, GRAZE, ALT

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(loyn)
Rows: 56
Columns: 7
$ ABUND   <dbl> 5.3, 2.0, 1.5, 17.1, 13.8, 14.1, 3.8, 2.2, 3.3, 3.0, 27.6, 1.8…
$ AREA    <dbl> 0.1, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.…
$ YR.ISOL <dbl> 1968, 1920, 1900, 1966, 1918, 1965, 1955, 1920, 1965, 1900, 19…
$ DIST    <dbl> 39, 234, 104, 66, 246, 234, 467, 284, 156, 311, 66, 93, 39, 40…
$ LDIST   <dbl> 39, 234, 311, 66, 246, 285, 467, 1829, 156, 571, 332, 93, 39, …
$ GRAZE   <dbl> 2, 5, 5, 3, 5, 3, 5, 5, 4, 5, 3, 5, 2, 1, 5, 5, 3, 3, 3, 2, 2,…
$ ALT     <dbl> 160, 60, 140, 160, 140, 130, 90, 60, 130, 130, 210, 160, 210, …
loyn |> glimpse()
Rows: 56
Columns: 7
$ ABUND   <dbl> 5.3, 2.0, 1.5, 17.1, 13.8, 14.1, 3.8, 2.2, 3.3, 3.0, 27.6, 1.8…
$ AREA    <dbl> 0.1, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.…
$ YR.ISOL <dbl> 1968, 1920, 1900, 1966, 1918, 1965, 1955, 1920, 1965, 1900, 19…
$ DIST    <dbl> 39, 234, 104, 66, 246, 234, 467, 284, 156, 311, 66, 93, 39, 40…
$ LDIST   <dbl> 39, 234, 311, 66, 246, 285, 467, 1829, 156, 571, 332, 93, 39, …
$ GRAZE   <dbl> 2, 5, 5, 3, 5, 3, 5, 5, 4, 5, 3, 5, 2, 1, 5, 5, 3, 3, 3, 2, 2,…
$ ALT     <dbl> 160, 60, 140, 160, 140, 130, 90, 60, 130, 130, 210, 160, 210, …
## Explore the first 6 rows of the data
loyn |> head()
# A tibble: 6 × 7
  ABUND  AREA YR.ISOL  DIST LDIST GRAZE   ALT
  <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
1   5.3   0.1    1968    39    39     2   160
2   2     0.5    1920   234   234     5    60
3   1.5   0.5    1900   104   311     5   140
4  17.1   1      1966    66    66     3   160
5  13.8   1      1918   246   246     5   140
6  14.1   1      1965   234   285     3   130
loyn |> str()
spc_tbl_ [56 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ABUND  : num [1:56] 5.3 2 1.5 17.1 13.8 14.1 3.8 2.2 3.3 3 ...
 $ AREA   : num [1:56] 0.1 0.5 0.5 1 1 1 1 1 1 1 ...
 $ YR.ISOL: num [1:56] 1968 1920 1900 1966 1918 ...
 $ DIST   : num [1:56] 39 234 104 66 246 234 467 284 156 311 ...
 $ LDIST  : num [1:56] 39 234 311 66 246 ...
 $ GRAZE  : num [1:56] 2 5 5 3 5 3 5 5 4 5 ...
 $ ALT    : num [1:56] 160 60 140 160 140 130 90 60 130 130 ...
 - attr(*, "spec")=
  .. cols(
  ..   ABUND = col_double(),
  ..   AREA = col_double(),
  ..   YR.ISOL = col_double(),
  ..   DIST = col_double(),
  ..   LDIST = col_double(),
  ..   GRAZE = col_double(),
  ..   ALT = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
loyn |> datawizard::data_codebook()
loyn (56 rows and 7 variables, 7 shown)

ID | Name    | Type    | Missings |       Values |          N
---+---------+---------+----------+--------------+-----------
1  | ABUND   | numeric | 0 (0.0%) |  [1.5, 39.6] |         56
---+---------+---------+----------+--------------+-----------
2  | AREA    | numeric | 0 (0.0%) |  [0.1, 1771] |         56
---+---------+---------+----------+--------------+-----------
3  | YR.ISOL | numeric | 0 (0.0%) | [1890, 1976] |         56
---+---------+---------+----------+--------------+-----------
4  | DIST    | numeric | 0 (0.0%) |   [26, 1427] |         56
---+---------+---------+----------+--------------+-----------
5  | LDIST   | numeric | 0 (0.0%) |   [26, 4426] |         56
---+---------+---------+----------+--------------+-----------
6  | GRAZE   | numeric | 0 (0.0%) |            1 | 13 (23.2%)
   |         |         |          |            2 |  8 (14.3%)
   |         |         |          |            3 | 15 (26.8%)
   |         |         |          |            4 |  7 (12.5%)
   |         |         |          |            5 | 13 (23.2%)
---+---------+---------+----------+--------------+-----------
7  | ALT     | numeric | 0 (0.0%) |    [60, 260] |         56
-------------------------------------------------------------

4 Exploratory data analysis

The GRAZE predictor represents grazing intensity. Note, this was essentially a categorical variable with levels 1 (no grazing) through to 5 (intense grazing). Although we could attempt to model this as a continuous predictor, such an approach would assume that this is a linear scale in which the interval between each successive level is the same. That is, the difference between level 1 and 2 is the same as the difference between 4 and 5 etc. Since, these were categories, such spacing is not guaranteed. It might therefore be better to model this variable as a categorical variable.

Finally, even in the absence of issues relating to the range of each predictor within each level of GRAZE, it might still be advantageous to centre each predictor. Centering will provide computational advantages and also ensure that the intercept has a more useful meaning.

loyn <- loyn |> mutate(fGRAZE = factor(GRAZE))

Unlike linear models, regression trees make fewer assumptions about the data. For example, normality (distribution), homogeneity of variable (dispersion) and linearity are no longer assumed. Rather than optimising against a likelihood, regression trees use a loss function.

Nevertheless, it is always insightful to explore the data prior to performing any analyses.

For regression trees, it is also important to consider whether you want to impose monotonic relationships. Monotonic relationships are those that only advance either positively or negatively. Regression trees are inherently non-linear and have the tendency to overfit to minor perturbations in the training data. In situations where such undulations would lack ecological reasoning, we can specify that a specific partial trend must be monotonic.

A scatterplot matrix plots each variable against each other. It is useful to provide boxplots in the diagonals to help explore the distributions.

scatterplotMatrix(~ ABUND + DIST + LDIST + AREA + fGRAZE + ALT + YR.ISOL,
  data = loyn,
  diagonal = list(method = "boxplot")
)

Conclusions:

  • on the top row, ABUND is on the y-axis of each plot
  • the second plot from the top left has DIST on the x-axis
  • it is clear from looking at the boxplots on the diagonals that some of the potential predictors (DIST, LDIST and AREA) are not normally distributed.
  • it might be worth log-transforming these variables. Not only might this help normalise those predictors, it might also improve the linearity between the response and those predictors.
  • we will reserve all other judgements of assumptions until we explore these log-transformations

5 Fit the model

loyn.gbm <- gbm(ABUND ~ DIST + LDIST + AREA + fGRAZE + ALT + YR.ISOL,
  data = loyn,
  distribution = "gaussian",
  var.monotone = c(0, 0, 1, 0, 1, 1),
  n.trees = 10000,
  interaction.depth = 5,
  bag.fraction = 0.5,
  shrinkage = 0.01,
  train.fraction = 1,
  n.minobsinnode = 2,
  cv.folds = 3
)

We will now determine the optimum number of trees estimated to be required in order to achieve a balance between bias (biased towards the exact observations) and precision (variability in estimates). Ideally, the optimum number of trees should be close to 1000. If it is much less (as in this case), it could imply that the tree learned too quickly. On the other hand, if the optimum number of trees is very close to the total number of fitted trees, then it suggests that the optimum may not actually have occured yet and that more trees should be used (or a faster learning rate).

(best.iter <- gbm.perf(loyn.gbm, method = "OOB"))
OOB generally underestimates the optimal number of iterations although predictive performance is reasonably competitive. Using cv_folds>1 when calling gbm usually results in improved predictive performance.

[1] 217
attr(,"smoother")
Call:
loess(formula = object$oobag.improve ~ x, enp.target = min(max(4, 
    length(x)/10), 50))

Number of Observations: 10000 
Equivalent Number of Parameters: 39.99 
Residual Standard Error: 0.04611 
(best.iter <- gbm.perf(loyn.gbm, method = "cv"))

[1] 255

Conclusions:

  • both out-of-bag and cross validation suggest that the total number of trees greatly exceeds the number necessary to achieve the optimum balance between bias and precision.
  • the best iterations for either method is less than 1000 suggesting that the learning rate was too fast
  • it might be worth decreasing the learning rate.
  • we can probably also reduce the total number of trees (so that it runs a little quicker)
loyn.gbm <- gbm(ABUND ~ DIST + LDIST + AREA + fGRAZE + ALT + YR.ISOL,
  data = loyn,
  distribution = "gaussian",
  var.monotone = c(0, 0, 1, 0, 1, 1),
  n.trees = 5000,
  interaction.depth = 7,
  bag.fraction = 0.5,
  shrinkage = 0.001,
  train.fraction = 1,
  n.minobsinnode = 2,
  cv.folds = 3
)
(best.iter <- gbm.perf(loyn.gbm, method = "OOB"))
OOB generally underestimates the optimal number of iterations although predictive performance is reasonably competitive. Using cv_folds>1 when calling gbm usually results in improved predictive performance.

[1] 1537
attr(,"smoother")
Call:
loess(formula = object$oobag.improve ~ x, enp.target = min(max(4, 
    length(x)/10), 50))

Number of Observations: 5000 
Equivalent Number of Parameters: 39.99 
Residual Standard Error: 0.009095 
(best.iter <- gbm.perf(loyn.gbm, method = "cv"))

[1] 1976

Conclusions:

  • that is better

6 Explore relative influence

If a predictor is an important driver of the patterns in the response, then many of the tree splits should feature this predictor. It thus follows that the number of proportion of total splits that features each predictor will be a measure of the relative influence of each of the predictors.

summary(loyn.gbm, n.trees = best.iter)

            var   rel.inf
AREA       AREA 46.973901
fGRAZE   fGRAZE 28.637978
DIST       DIST  8.117824
ALT         ALT  6.766625
LDIST     LDIST  6.137227
YR.ISOL YR.ISOL  3.366445

Conclusions:

  • shell weight is overwelmingly the most influential predictor.
  • we can also ascribe a kind of signficance to the influence values by determining which of them are greater than \(100/p\) where \(p\) is the number of predictors. The logic here is that if all predictors were equally influential, then they would all have a relative influence of \(100/p\). Hence, any predictors that have a relative influence greater than this number must be explaining more than its share (and more than pure chance) and predictors with relative influence lower are explaining less than chance (and therefore not significantly influential).

7 Explore partial effects

attr(loyn.gbm$Terms, "term.labels")
[1] "DIST"    "LDIST"   "AREA"    "fGRAZE"  "ALT"     "YR.ISOL"
plot(loyn.gbm, 3, n.tree = best.iter)

plot(loyn.gbm, 3, n.tree = best.iter, log = "x")

loyn.gbm |>
  pdp::partial(
    pred.var = "AREA",
    n.trees = best.iter,
    recursive = FALSE,
    inv.link = I
  ) |>
  autoplot()

loyn.gbm |>
  pdp::partial(
    pred.var = "AREA",
    n.trees = best.iter,
    recursive = FALSE,
    inv.link = I
  ) |>
  autoplot() +
  scale_x_log10()

Lets use a log x scale so that we can have greater granularity over the small patch areas. To do this, it is best to also create a custom prediction grid that is evenly spaced on the log scale.

newdata <- with(loyn, data.frame(lAREA = seq(min(log(AREA)), max(log(AREA)), len = 100))) |>
  mutate(AREA = exp(lAREA)) |>
  dplyr::select(-lAREA)

loyn.gbm |>
  pdp::partial(
    pred.var = "AREA",
    pred.grid = newdata,
    n.trees = best.iter,
    recursive = FALSE,
    inv.link = I
  ) |>
  autoplot() +
  scale_x_log10()

Recursive indicates that a weighted tree traversal method described by Friedman 2001 (which is very fast) should be used (only works for gbm). Otherwise a slower brute force method is used. If want to back transform - need to use brute force.

nms <- attr(loyn.gbm$Terms, "term.labels")
p <- vector("list", length(nms))
names(p) <- nms
for (nm in nms) {
  print(nm)
  p[[nm]] <- loyn.gbm |>
    pdp::partial(
      pred.var = nm,
      n.trees = best.iter,
      inv.link = I,
      recursive = FALSE,
      type = "regression"
    ) |>
    autoplot() + ylim(0, 30)
}
[1] "DIST"
[1] "LDIST"
[1] "AREA"
[1] "fGRAZE"
[1] "ALT"
[1] "YR.ISOL"
patchwork::wrap_plots(p)

patchwork::wrap_plots(p) & theme_classic()

pp <- map(
  .x = nms,
  .f = ~
    loyn.gbm |>
      pdp::partial(
        pred.var = .x,
        n.trees = best.iter,
        inv.link = I,
        recursive = FALSE,
        type = "regression"
      ) |>
      autoplot() + ylim(0, 30)
)

patchwork::wrap_plots(pp) & theme_classic()

We might also want to explore interactions…

newdata <- with(
  loyn,
  expand.grid(
    lAREA = seq(min(log(AREA)), max(log(AREA)), len = 100),
    fGRAZE = levels(fGRAZE)
  )
) |>
  mutate(
    AREA = exp(lAREA),
    fGRAZE = factor(fGRAZE)
  ) |>
  dplyr::select(AREA, fGRAZE)

head(newdata)
       AREA fGRAZE
1 0.1000000      1
2 0.1103853      1
3 0.1218492      1
4 0.1345036      1
5 0.1484722      1
6 0.1638915      1
pdp::partial(loyn.gbm,
  pred.var = c("AREA", "fGRAZE"),
  pred.grid = newdata,
  n.trees = best.iter,
  recursive = TRUE
) |>
  autoplot() +
  scale_x_log10()

## g1 = loyn.gbm %>% pdp::partial(pred.var='SHELL_WEIGHT', n.trees=best.iter,
##                             recursive=FALSE,inv.link=exp) %>%
##     autoplot
## g2 = loyn.gbm %>% pdp::partial(pred.var='HEIGHT', n.trees=best.iter,
##                             recursive=FALSE,inv.link=exp) %>%
##     autoplot


## g1 + g2


newdata <- with(
  loyn,
  expand.grid(
    lAREA = seq(min(log(AREA)), max(log(AREA)), len = 100),
    fGRAZE = levels(fGRAZE),
    DIST = NA, LDIST = NA, ALT = NA, YR.ISOL = NA
  )
) |>
  mutate(
    AREA = exp(lAREA),
    fGRAZE = factor(fGRAZE)
  ) |>
  dplyr::select(DIST, LDIST, AREA, fGRAZE, ALT, YR.ISOL)
loyn.fit <- newdata |>
  mutate(fit = predict(loyn.gbm,
    newdata = newdata,
    n.trees = best.iter
  ))

loyn.fit |>
  ggplot(aes(y = fit, x = AREA, colour = fGRAZE)) +
  geom_line() +
  scale_x_log10()

The following no longer works

loyn.fit <-
  pdp::partial(loyn.gbm,
    pred.var = c("AREA", "fGRAZE"),
    pred.grid = newdata,
    n.trees = best.iter,
    recursive = TRUE
  ) |>
  as.data.frame()
head(loyn.fit)
  AREA fGRAZE      yhat NA NA NA       NA
1   NA     NA 0.1000000  1 NA NA 18.96636
2   NA     NA 0.1000000  2 NA NA 18.96636
3   NA     NA 0.1000000  3 NA NA 18.96636
4   NA     NA 0.1000000  4 NA NA 18.96636
5   NA     NA 0.1000000  5 NA NA 18.96636
6   NA     NA 0.1103853  1 NA NA 18.96636
loyn.fit |> ggplot(aes(y = yhat, x = AREA)) +
  geom_line(aes(colour = fGRAZE)) +
  scale_x_log10() +
  theme_classic()
Warning: Removed 500 rows containing missing values or values outside the scale range
(`geom_line()`).

8 Explore interactions

What were the strong main effects:

  • AREA
  • fGRAZE
  • YR.ISOL
  • ALT
attr(loyn.gbm$Terms, "term.labels")
[1] "DIST"    "LDIST"   "AREA"    "fGRAZE"  "ALT"     "YR.ISOL"
interact.gbm(loyn.gbm, loyn, c(3, 4), n.tree = best.iter)
[1] 0.2388035
interact.gbm(loyn.gbm, loyn, c(3, 4, 6), n.tree = best.iter)
[1] 0.00539512
terms <- attr(loyn.gbm$Terms, "term.labels")
loyn.int <- NULL
for (i in 1:(length(terms) - 1)) {
  for (j in (i + 1):length(terms)) {
    print(paste("i=", i, " Name = ", terms[i]))
    print(paste("j=", j, " Name = ", terms[j]))
    loyn.int <- rbind(
      loyn.int,
      data.frame(
        Var1 = terms[i], Var2 = terms[j],
        "H.stat" = interact.gbm(loyn.gbm, loyn, c(i, j),
          n.tree = best.iter
        )
      )
    )
  }
}
[1] "i= 1  Name =  DIST"
[1] "j= 2  Name =  LDIST"
[1] "i= 1  Name =  DIST"
[1] "j= 3  Name =  AREA"
[1] "i= 1  Name =  DIST"
[1] "j= 4  Name =  fGRAZE"
[1] "i= 1  Name =  DIST"
[1] "j= 5  Name =  ALT"
[1] "i= 1  Name =  DIST"
[1] "j= 6  Name =  YR.ISOL"
[1] "i= 2  Name =  LDIST"
[1] "j= 3  Name =  AREA"
[1] "i= 2  Name =  LDIST"
[1] "j= 4  Name =  fGRAZE"
[1] "i= 2  Name =  LDIST"
[1] "j= 5  Name =  ALT"
[1] "i= 2  Name =  LDIST"
[1] "j= 6  Name =  YR.ISOL"
[1] "i= 3  Name =  AREA"
[1] "j= 4  Name =  fGRAZE"
[1] "i= 3  Name =  AREA"
[1] "j= 5  Name =  ALT"
[1] "i= 3  Name =  AREA"
[1] "j= 6  Name =  YR.ISOL"
[1] "i= 4  Name =  fGRAZE"
[1] "j= 5  Name =  ALT"
[1] "i= 4  Name =  fGRAZE"
[1] "j= 6  Name =  YR.ISOL"
[1] "i= 5  Name =  ALT"
[1] "j= 6  Name =  YR.ISOL"
loyn.int |> arrange(-H.stat)
     Var1    Var2     H.stat
1    AREA  fGRAZE 0.23880355
2    DIST   LDIST 0.13935695
3    DIST  fGRAZE 0.11937916
4    AREA     ALT 0.08348306
5  fGRAZE     ALT 0.07926973
6    DIST     ALT 0.07878776
7    DIST YR.ISOL 0.05437523
8   LDIST     ALT 0.05151848
9    DIST    AREA 0.04867882
10  LDIST  fGRAZE 0.04635137
11  LDIST YR.ISOL 0.04188625
12 fGRAZE YR.ISOL 0.04086562
13  LDIST    AREA 0.03979671
14    ALT YR.ISOL 0.03527809
15   AREA YR.ISOL 0.02372928

9 Tuning

The takes a long time - do over a break

head(loyn)
# A tibble: 6 × 8
  ABUND  AREA YR.ISOL  DIST LDIST GRAZE   ALT fGRAZE
  <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
1   5.3   0.1    1968    39    39     2   160 2     
2   2     0.5    1920   234   234     5    60 5     
3   1.5   0.5    1900   104   311     5   140 5     
4  17.1   1      1966    66    66     3   160 3     
5  13.8   1      1918   246   246     5   140 5     
6  14.1   1      1965   234   285     3   130 3     
set.seed(123)
loyn.gbm1 <- gbm.step(
  data = loyn |> as.data.frame(),
  gbm.x = c(2:5, 7, 8),
  gbm.y = 1,
  tree.complexity = 7,
  var.monotone = c(1, 1, 1, 1, 1, 0),
  learning.rate = 0.001,
  bag.fraction = 0.5,
  family = "gaussian",
  n.trees = 1000,
  step.size = 1000,
  max.trees = 10000
)

 
 GBM STEP - version 2.9 
 
Performing cross-validation optimisation of a boosted regression tree model 
for ABUND and using a family of gaussian 
Using 56 observations and 6 predictors 
creating 10 initial models of 1000 trees 

 folds are unstratified 
total mean deviance =  113.1773 
tolerance is fixed at  0.1132 
ntrees resid. dev. 
1000    70.0734 
now adding trees... 
2000   52.6438 
3000   45.6783 
4000   42.8919 
5000   42.2368 
6000   42.3832 
7000   42.8948 
8000   43.3697 
9000   44.0321 
10000   44.5394 

fitting final gbm model with a fixed number of 5000 trees for ABUND

mean total deviance = 113.177 
mean residual deviance = 26.588 
 
estimated cv deviance = 42.237 ; se = 6.163 
 
training data correlation = 0.876 
cv correlation =  0.78 ; se = 0.041 
 
elapsed time -  0.03 minutes 

 ########### warning ########## 
 
maximum tree limit reached - results may not be optimal 
  - refit with faster learning rate or increase maximum number of trees 
summary(loyn.gbm1)

            var    rel.inf
AREA       AREA 51.3441970
fGRAZE   fGRAZE 36.0758288
ALT         ALT  9.0416938
YR.ISOL YR.ISOL  2.1921059
LDIST     LDIST  0.8012732
DIST       DIST  0.5449014
gbm.plot(loyn.gbm1, n.plots = 7, write.title = FALSE)
Warning in gbm.plot(loyn.gbm1, n.plots = 7, write.title = FALSE): reducing no
of plotted predictors to maximum available (6)

gbm.plot.fits(loyn.gbm1)

find.int <- gbm.interactions(loyn.gbm1)
gbm.interactions - version 2.9
Cross tabulating interactions for gbm model with 6 predictors
1 2 3 4 5 
summary(find.int)
             Length Class      Mode   
rank.list     5     data.frame list   
interactions 36     -none-     numeric
gbm.call     21     -none-     list   
find.int$rank.list
  var1.index var1.names var2.index var2.names int.size
1          7       <NA>          0                   0
2          6     fGRAZE          5        ALT        0
gbm.perspec(loyn.gbm1, x = 1, y = 3)
maximum value = 26.45

10 Bootstrapping

nBoot <- 10
loyn.pred <- with(
  loyn,
  expand.grid(
    lAREA = seq(min(log(AREA)), max(log(AREA)), len = 100),
    fGRAZE = levels(fGRAZE),
    DIST = NA,
    LDIST = NA,
    ALT = NA,
    YR.ISOL = NA
  )
) |>
  mutate(
    fGRAZE = factor(fGRAZE),
    AREA = exp(lAREA)
  ) |>
  dplyr::select(AREA, lAREA, fGRAZE, DIST, LDIST, ALT, YR.ISOL)

loyn.list <- vector("list", nBoot)
## loyn.list
loyn.sum <- vector("list", nBoot)
for (i in 1:nBoot) {
  print(paste0("Boot number: ", i))
  ## Create random set
  loyn.rnd <- loyn |>
    sample_n(size = n(), replace = TRUE)
  ## Fit the trees
  loyn.gbm <- gbm(ABUND ~ AREA + fGRAZE + DIST + LDIST + ALT + YR.ISOL,
    data = loyn.rnd,
    distribution = "gaussian",
    var.monotone = c(1, 0, 1, 1, 1, 1),
    n.trees = 5000,
    interaction.depth = 7,
    bag.fraction = 0.5,
    shrinkage = 0.001,
    train.fraction = 1,
    n.minobsinnode = 2,
    cv.folds = 3
  )
  ## Determine the best number of trees
  (best.iter <- gbm.perf(loyn.gbm, method = "cv"))
  ## predict based on shell weight
  fit <- predict(loyn.gbm, newdata = loyn.pred, n.trees = best.iter)
  loyn.list[[i]] <- data.frame(loyn.pred, Boot = i, Fit = fit)
  ## relative influence
  loyn.sum[[i]] <- summary(loyn.gbm, n.trees = best.iter)
}
[1] "Boot number: 1"

[1] "Boot number: 2"

[1] "Boot number: 3"

[1] "Boot number: 4"

[1] "Boot number: 5"

[1] "Boot number: 6"

[1] "Boot number: 7"

[1] "Boot number: 8"

[1] "Boot number: 9"

[1] "Boot number: 10"

loyn.fit <- do.call("rbind", loyn.list)
loyn.fit <- loyn.fit |>
  group_by(AREA, fGRAZE) |>
  ## summarise(Median = median(Fit),
  ##           Lower = quantile(Fit, p=0.025),
  ##           Upper = quantile(Fit, p=0.975))
  ggdist::median_hdci(Fit)
g1 <- loyn.fit |> ggplot(aes(y = Fit, x = AREA, fill = fGRAZE, color = fGRAZE)) +
  geom_ribbon(aes(ymin = .lower, ymax = .upper), alpha = 0.3, color = NA) +
  geom_line() +
  scale_fill_viridis_d() +
  scale_colour_viridis_d() +
  scale_x_log10() +
  theme_classic()

loyn.inf <- do.call("rbind", loyn.sum)
loyn.inf <- loyn.inf |>
  group_by(var) |>
  ggdist::median_hdci(rel.inf)

g2 <- loyn.inf |>
  arrange(rel.inf) |>
  mutate(var = factor(var, levels = unique(var))) |>
  ggplot(aes(y = var, x = rel.inf)) +
  geom_vline(xintercept = 12.5, linetype = "dashed") +
  geom_pointrange(aes(xmin = .lower, xmax = .upper)) +
  theme_classic()

g2 + patchwork::inset_element(g1, left = 0.3, bottom = 0.01, right = 1, top = 0.7)

g1 + patchwork::inset_element(g2, left = 0.5, bottom = 0.01, right = 1, top = 0.4)

set.seed(123)

nBoot <- 10

loyn.boot <-
  tibble(Boot = 1:nBoot) |>
  ## Create bootstrapp data sets
  mutate(data = map(
    .x = Boot,
    .f = ~ loyn |>
      sample_n(size = n(), replace = TRUE)
  )) |>
  ## Fit the trees
  mutate(gbm = map2(
    .x = data,
    .y = Boot,
    .f = ~ {
      print(paste("Boot=", .y))
      loyn.gbm <- gbm(ABUND ~ AREA + fGRAZE + DIST + LDIST + ALT + YR.ISOL,
        data = .x,
        distribution = "gaussian",
        var.monotone = c(1, 0, 1, 1, 1, 1),
        n.trees = 5000,
        interaction.depth = 7,
        bag.fraction = 0.5,
        shrinkage = 0.001,
        train.fraction = 1,
        n.minobsinnode = 2,
        cv.folds = 3
      )
    }
  )) |>
  ## Determine the best number of trees
  mutate(best.iter = map(
    .x = gbm,
    .f = ~ gbm.perf(.x, method = "cv")
  )) |>
  ## predict
  mutate(fit = map2(
    .x = gbm,
    .y = best.iter,
    .f = ~ newdata |>
      mutate(fit = predict(.x, newdata = loyn.pred, n.trees = .y))
  )) |>
  ## relative influence
  mutate(infl = map2(
    .x = gbm,
    .y = best.iter,
    .f = ~ summary(.x, n.trees = .y)
  ))
[1] "Boot= 1"
[1] "Boot= 2"
[1] "Boot= 3"
[1] "Boot= 4"
[1] "Boot= 5"
[1] "Boot= 6"
[1] "Boot= 7"
[1] "Boot= 8"
[1] "Boot= 9"
[1] "Boot= 10"

loyn.boot[1, "data"][[1]][[1]]
# A tibble: 56 × 8
   ABUND  AREA YR.ISOL  DIST LDIST GRAZE   ALT fGRAZE
   <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
 1   6.8  10      1962   337   519     3   110 3     
 2   8     2      1900   259   259     5   120 5     
 3  34.4  96      1976    39   519     2   175 2     
 4  14.6   2      1972   402   402     1   210 1     
 5   1.5   0.5    1900   104   311     5   140 5     
 6  24.9  29      1965   545   770     3   130 3     
 7  31    57      1963   467   467     1   165 1     
 8  33   144      1940   146   319     1   190 1     
 9  29    32      1974   208   208     1   190 1     
10  11.5  17      1920   389  2595     5   100 5     
# ℹ 46 more rows
loyn.boot[2, "data"][[1]][[1]]
# A tibble: 56 × 8
   ABUND  AREA YR.ISOL  DIST LDIST GRAZE   ALT fGRAZE
   <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <fct> 
 1   2.2     1    1920   284  1829     5    60 5     
 2   1.8     2    1890    93    93     5   160 5     
 3  21.2     2    1973    39    39     2   210 2     
 4   2.9     3    1965    26    26     3   140 3     
 5  27.8    12    1963   159   159     4   110 4     
 6  16.3     7    1965   285   882     3   130 3     
 7  19.5     6    1890    93   226     3   170 3     
 8  26      18    1966    40  3188     2   190 2     
 9  24.4     4    1973   234   519     2   220 2     
10   8       2    1900   259   259     5   120 5     
# ℹ 46 more rows
loyn.boot[1, "fit"][[1]][[1]]
    DIST LDIST         AREA fGRAZE ALT YR.ISOL       fit
1     NA    NA    0.1000000      1  NA      NA  9.390859
2     NA    NA    0.1103853      1  NA      NA  9.390859
3     NA    NA    0.1218492      1  NA      NA  9.390859
4     NA    NA    0.1345036      1  NA      NA  9.390859
5     NA    NA    0.1484722      1  NA      NA  9.390859
6     NA    NA    0.1638915      1  NA      NA  9.390859
7     NA    NA    0.1809122      1  NA      NA  9.390859
8     NA    NA    0.1997005      1  NA      NA  9.390859
9     NA    NA    0.2204400      1  NA      NA  9.390859
10    NA    NA    0.2433334      1  NA      NA  9.390859
11    NA    NA    0.2686043      1  NA      NA  9.390859
12    NA    NA    0.2964997      1  NA      NA  9.390859
13    NA    NA    0.3272921      1  NA      NA  9.390859
14    NA    NA    0.3612825      1  NA      NA  9.390859
15    NA    NA    0.3988028      1  NA      NA  9.390859
16    NA    NA    0.4402197      1  NA      NA  9.390859
17    NA    NA    0.4859379      1  NA      NA  9.390859
18    NA    NA    0.5364041      1  NA      NA  9.390859
19    NA    NA    0.5921113      1  NA      NA  9.390859
20    NA    NA    0.6536040      1  NA      NA  9.390859
21    NA    NA    0.7214828      1  NA      NA  9.390859
22    NA    NA    0.7964110      1  NA      NA  9.390859
23    NA    NA    0.8791208      1  NA      NA  9.390859
24    NA    NA    0.9704203      1  NA      NA  9.390859
25    NA    NA    1.0712015      1  NA      NA  9.390859
26    NA    NA    1.1824491      1  NA      NA  9.390859
27    NA    NA    1.3052502      1  NA      NA  9.390859
28    NA    NA    1.4408045      1  NA      NA  9.390859
29    NA    NA    1.5904366      1  NA      NA 10.700983
30    NA    NA    1.7556084      1  NA      NA 10.700983
31    NA    NA    1.9379339      1  NA      NA 10.700983
32    NA    NA    2.1391944      1  NA      NA 10.770800
33    NA    NA    2.3613565      1  NA      NA 10.770800
34    NA    NA    2.6065908      1  NA      NA 15.826785
35    NA    NA    2.8772934      1  NA      NA 15.826785
36    NA    NA    3.1761094      1  NA      NA 17.202779
37    NA    NA    3.5059583      1  NA      NA 17.501642
38    NA    NA    3.8700631      1  NA      NA 17.501642
39    NA    NA    4.2719813      1  NA      NA 18.004036
40    NA    NA    4.7156400      1  NA      NA 18.018450
41    NA    NA    5.2053741      1  NA      NA 18.064443
42    NA    NA    5.7459685      1  NA      NA 18.203466
43    NA    NA    6.3427054      1  NA      NA 18.203466
44    NA    NA    7.0014153      1  NA      NA 18.207380
45    NA    NA    7.7285343      1  NA      NA 18.652771
46    NA    NA    8.5311669      1  NA      NA 18.652771
47    NA    NA    9.4171554      1  NA      NA 18.742123
48    NA    NA   10.3951565      1  NA      NA 18.974916
49    NA    NA   11.4747262      1  NA      NA 20.393352
50    NA    NA   12.6664126      1  NA      NA 21.257992
51    NA    NA   13.9818594      1  NA      NA 21.559807
52    NA    NA   15.4339194      1  NA      NA 23.204052
53    NA    NA   17.0367805      1  NA      NA 23.622406
54    NA    NA   18.8061037      1  NA      NA 24.602910
55    NA    NA   20.7591766      1  NA      NA 24.631676
56    NA    NA   22.9150824      1  NA      NA 24.839198
57    NA    NA   25.2948857      1  NA      NA 25.033974
58    NA    NA   27.9218391      1  NA      NA 25.156057
59    NA    NA   30.8216098      1  NA      NA 27.012538
60    NA    NA   34.0225309      1  NA      NA 27.071318
61    NA    NA   37.5558777      1  NA      NA 27.819910
62    NA    NA   41.4561736      1  NA      NA 27.819910
63    NA    NA   45.7615276      1  NA      NA 28.451458
64    NA    NA   50.5140060      1  NA      NA 28.704582
65    NA    NA   55.7600443      1  NA      NA 28.704582
66    NA    NA   61.5509002      1  NA      NA 28.708993
67    NA    NA   67.9431546      1  NA      NA 29.131892
68    NA    NA   74.9992648      1  NA      NA 29.322478
69    NA    NA   82.7881742      1  NA      NA 29.381832
70    NA    NA   91.3859863      1  NA      NA 29.423220
71    NA    NA  100.8767082      1  NA      NA 29.424055
72    NA    NA  111.3530714      1  NA      NA 29.424055
73    NA    NA  122.9174379      1  NA      NA 29.506052
74    NA    NA  135.6828001      1  NA      NA 29.534548
75    NA    NA  149.7738854      1  NA      NA 29.683823
76    NA    NA  165.3283741      1  NA      NA 29.683823
77    NA    NA  182.4982454      1  NA      NA 29.683823
78    NA    NA  201.4512618      1  NA      NA 29.683823
79    NA    NA  222.3726086      1  NA      NA 29.683823
80    NA    NA  245.4667029      1  NA      NA 29.683823
81    NA    NA  270.9591915      1  NA      NA 29.683823
82    NA    NA  299.0991552      1  NA      NA 29.683823
83    NA    NA  330.1615426      1  NA      NA 29.683823
84    NA    NA  364.4498566      1  NA      NA 29.683823
85    NA    NA  402.2991197      1  NA      NA 29.683823
86    NA    NA  444.0791477      1  NA      NA 29.683823
87    NA    NA  490.1981630      1  NA      NA 29.683823
88    NA    NA  541.1067830      1  NA      NA 29.683823
89    NA    NA  597.3024232      1  NA      NA 29.683823
90    NA    NA  659.3341572      1  NA      NA 29.683823
91    NA    NA  727.8080818      1  NA      NA 29.683823
92    NA    NA  803.3932387      1  NA      NA 29.683823
93    NA    NA  886.8281517      1  NA      NA 29.683823
94    NA    NA  978.9280426      1  NA      NA 29.683823
95    NA    NA 1080.5927968      1  NA      NA 29.683823
96    NA    NA 1192.8157551      1  NA      NA 29.683823
97    NA    NA 1316.6934205      1  NA      NA 29.683823
98    NA    NA 1453.4361707      1  NA      NA 29.683823
99    NA    NA 1604.3800854      1  NA      NA 29.683823
100   NA    NA 1771.0000000      1  NA      NA 29.683823
101   NA    NA    0.1000000      2  NA      NA  9.167934
102   NA    NA    0.1103853      2  NA      NA  9.167934
103   NA    NA    0.1218492      2  NA      NA  9.167934
104   NA    NA    0.1345036      2  NA      NA  9.167934
105   NA    NA    0.1484722      2  NA      NA  9.167934
106   NA    NA    0.1638915      2  NA      NA  9.167934
107   NA    NA    0.1809122      2  NA      NA  9.167934
108   NA    NA    0.1997005      2  NA      NA  9.167934
109   NA    NA    0.2204400      2  NA      NA  9.167934
110   NA    NA    0.2433334      2  NA      NA  9.167934
111   NA    NA    0.2686043      2  NA      NA  9.167934
112   NA    NA    0.2964997      2  NA      NA  9.167934
113   NA    NA    0.3272921      2  NA      NA  9.167934
114   NA    NA    0.3612825      2  NA      NA  9.167934
115   NA    NA    0.3988028      2  NA      NA  9.167934
116   NA    NA    0.4402197      2  NA      NA  9.167934
117   NA    NA    0.4859379      2  NA      NA  9.167934
118   NA    NA    0.5364041      2  NA      NA  9.167934
119   NA    NA    0.5921113      2  NA      NA  9.167934
120   NA    NA    0.6536040      2  NA      NA  9.167934
121   NA    NA    0.7214828      2  NA      NA  9.167934
122   NA    NA    0.7964110      2  NA      NA  9.167934
123   NA    NA    0.8791208      2  NA      NA  9.167934
124   NA    NA    0.9704203      2  NA      NA  9.167934
125   NA    NA    1.0712015      2  NA      NA  9.167934
126   NA    NA    1.1824491      2  NA      NA  9.167934
127   NA    NA    1.3052502      2  NA      NA  9.167934
128   NA    NA    1.4408045      2  NA      NA  9.167934
129   NA    NA    1.5904366      2  NA      NA 10.428882
130   NA    NA    1.7556084      2  NA      NA 10.428882
131   NA    NA    1.9379339      2  NA      NA 10.428882
132   NA    NA    2.1391944      2  NA      NA 10.484065
133   NA    NA    2.3613565      2  NA      NA 10.484065
134   NA    NA    2.6065908      2  NA      NA 14.723795
135   NA    NA    2.8772934      2  NA      NA 14.723795
136   NA    NA    3.1761094      2  NA      NA 15.959812
137   NA    NA    3.5059583      2  NA      NA 16.224111
138   NA    NA    3.8700631      2  NA      NA 16.224111
139   NA    NA    4.2719813      2  NA      NA 16.626122
140   NA    NA    4.7156400      2  NA      NA 16.643741
141   NA    NA    5.2053741      2  NA      NA 16.663846
142   NA    NA    5.7459685      2  NA      NA 16.757942
143   NA    NA    6.3427054      2  NA      NA 16.757942
144   NA    NA    7.0014153      2  NA      NA 16.757942
145   NA    NA    7.7285343      2  NA      NA 17.061307
146   NA    NA    8.5311669      2  NA      NA 17.061307
147   NA    NA    9.4171554      2  NA      NA 17.114256
148   NA    NA   10.3951565      2  NA      NA 17.239329
149   NA    NA   11.4747262      2  NA      NA 18.554618
150   NA    NA   12.6664126      2  NA      NA 19.310278
151   NA    NA   13.9818594      2  NA      NA 19.587296
152   NA    NA   15.4339194      2  NA      NA 21.733466
153   NA    NA   17.0367805      2  NA      NA 22.282012
154   NA    NA   18.8061037      2  NA      NA 23.640438
155   NA    NA   20.7591766      2  NA      NA 23.670278
156   NA    NA   22.9150824      2  NA      NA 23.956525
157   NA    NA   25.2948857      2  NA      NA 24.198335
158   NA    NA   27.9218391      2  NA      NA 24.362590
159   NA    NA   30.8216098      2  NA      NA 26.714680
160   NA    NA   34.0225309      2  NA      NA 26.790739
161   NA    NA   37.5558777      2  NA      NA 27.711281
162   NA    NA   41.4561736      2  NA      NA 27.711281
163   NA    NA   45.7615276      2  NA      NA 28.337999
164   NA    NA   50.5140060      2  NA      NA 28.580568
165   NA    NA   55.7600443      2  NA      NA 28.580568
166   NA    NA   61.5509002      2  NA      NA 28.606487
167   NA    NA   67.9431546      2  NA      NA 29.133397
168   NA    NA   74.9992648      2  NA      NA 29.323569
169   NA    NA   82.7881742      2  NA      NA 29.377291
170   NA    NA   91.3859863      2  NA      NA 29.398434
171   NA    NA  100.8767082      2  NA      NA 29.399269
172   NA    NA  111.3530714      2  NA      NA 29.399269
173   NA    NA  122.9174379      2  NA      NA 29.458148
174   NA    NA  135.6828001      2  NA      NA 29.477506
175   NA    NA  149.7738854      2  NA      NA 29.573336
176   NA    NA  165.3283741      2  NA      NA 29.573336
177   NA    NA  182.4982454      2  NA      NA 29.573336
178   NA    NA  201.4512618      2  NA      NA 29.573336
179   NA    NA  222.3726086      2  NA      NA 29.573336
180   NA    NA  245.4667029      2  NA      NA 29.573336
181   NA    NA  270.9591915      2  NA      NA 29.573336
182   NA    NA  299.0991552      2  NA      NA 29.573336
183   NA    NA  330.1615426      2  NA      NA 29.573336
184   NA    NA  364.4498566      2  NA      NA 29.573336
185   NA    NA  402.2991197      2  NA      NA 29.573336
186   NA    NA  444.0791477      2  NA      NA 29.573336
187   NA    NA  490.1981630      2  NA      NA 29.573336
188   NA    NA  541.1067830      2  NA      NA 29.573336
189   NA    NA  597.3024232      2  NA      NA 29.573336
190   NA    NA  659.3341572      2  NA      NA 29.573336
191   NA    NA  727.8080818      2  NA      NA 29.573336
192   NA    NA  803.3932387      2  NA      NA 29.573336
193   NA    NA  886.8281517      2  NA      NA 29.573336
194   NA    NA  978.9280426      2  NA      NA 29.573336
195   NA    NA 1080.5927968      2  NA      NA 29.573336
196   NA    NA 1192.8157551      2  NA      NA 29.573336
197   NA    NA 1316.6934205      2  NA      NA 29.573336
198   NA    NA 1453.4361707      2  NA      NA 29.573336
199   NA    NA 1604.3800854      2  NA      NA 29.573336
200   NA    NA 1771.0000000      2  NA      NA 29.573336
201   NA    NA    0.1000000      3  NA      NA 10.112622
202   NA    NA    0.1103853      3  NA      NA 10.112622
203   NA    NA    0.1218492      3  NA      NA 10.112622
204   NA    NA    0.1345036      3  NA      NA 10.112622
205   NA    NA    0.1484722      3  NA      NA 10.112622
206   NA    NA    0.1638915      3  NA      NA 10.112622
207   NA    NA    0.1809122      3  NA      NA 10.112622
208   NA    NA    0.1997005      3  NA      NA 10.112622
209   NA    NA    0.2204400      3  NA      NA 10.112622
210   NA    NA    0.2433334      3  NA      NA 10.112622
211   NA    NA    0.2686043      3  NA      NA 10.112622
212   NA    NA    0.2964997      3  NA      NA 10.112622
213   NA    NA    0.3272921      3  NA      NA 10.112622
214   NA    NA    0.3612825      3  NA      NA 10.112622
215   NA    NA    0.3988028      3  NA      NA 10.112622
216   NA    NA    0.4402197      3  NA      NA 10.112622
217   NA    NA    0.4859379      3  NA      NA 10.112622
218   NA    NA    0.5364041      3  NA      NA 10.112622
219   NA    NA    0.5921113      3  NA      NA 10.112622
220   NA    NA    0.6536040      3  NA      NA 10.112622
221   NA    NA    0.7214828      3  NA      NA 10.112622
222   NA    NA    0.7964110      3  NA      NA 10.112622
223   NA    NA    0.8791208      3  NA      NA 10.112622
224   NA    NA    0.9704203      3  NA      NA 10.112622
225   NA    NA    1.0712015      3  NA      NA 10.112622
226   NA    NA    1.1824491      3  NA      NA 10.112622
227   NA    NA    1.3052502      3  NA      NA 10.112622
228   NA    NA    1.4408045      3  NA      NA 10.112622
229   NA    NA    1.5904366      3  NA      NA 11.594290
230   NA    NA    1.7556084      3  NA      NA 11.594290
231   NA    NA    1.9379339      3  NA      NA 11.594290
232   NA    NA    2.1391944      3  NA      NA 11.666625
233   NA    NA    2.3613565      3  NA      NA 11.666625
234   NA    NA    2.6065908      3  NA      NA 16.730515
235   NA    NA    2.8772934      3  NA      NA 16.730515
236   NA    NA    3.1761094      3  NA      NA 18.069010
237   NA    NA    3.5059583      3  NA      NA 18.315577
238   NA    NA    3.8700631      3  NA      NA 18.315577
239   NA    NA    4.2719813      3  NA      NA 18.764906
240   NA    NA    4.7156400      3  NA      NA 18.771247
241   NA    NA    5.2053741      3  NA      NA 18.772272
242   NA    NA    5.7459685      3  NA      NA 18.881026
243   NA    NA    6.3427054      3  NA      NA 18.881026
244   NA    NA    7.0014153      3  NA      NA 18.881026
245   NA    NA    7.7285343      3  NA      NA 19.299718
246   NA    NA    8.5311669      3  NA      NA 19.299718
247   NA    NA    9.4171554      3  NA      NA 19.391513
248   NA    NA   10.3951565      3  NA      NA 19.569216
249   NA    NA   11.4747262      3  NA      NA 21.113427
250   NA    NA   12.6664126      3  NA      NA 22.076105
251   NA    NA   13.9818594      3  NA      NA 22.414190
252   NA    NA   15.4339194      3  NA      NA 24.251388
253   NA    NA   17.0367805      3  NA      NA 24.697859
254   NA    NA   18.8061037      3  NA      NA 25.663287
255   NA    NA   20.7591766      3  NA      NA 25.689034
256   NA    NA   22.9150824      3  NA      NA 25.905849
257   NA    NA   25.2948857      3  NA      NA 26.102714
258   NA    NA   27.9218391      3  NA      NA 26.227871
259   NA    NA   30.8216098      3  NA      NA 27.930820
260   NA    NA   34.0225309      3  NA      NA 27.953703
261   NA    NA   37.5558777      3  NA      NA 28.562051
262   NA    NA   41.4561736      3  NA      NA 28.562051
263   NA    NA   45.7615276      3  NA      NA 29.085419
264   NA    NA   50.5140060      3  NA      NA 29.321208
265   NA    NA   55.7600443      3  NA      NA 29.321208
266   NA    NA   61.5509002      3  NA      NA 29.320469
267   NA    NA   67.9431546      3  NA      NA 29.696444
268   NA    NA   74.9992648      3  NA      NA 29.880596
269   NA    NA   82.7881742      3  NA      NA 29.931594
270   NA    NA   91.3859863      3  NA      NA 29.954917
271   NA    NA  100.8767082      3  NA      NA 29.955752
272   NA    NA  111.3530714      3  NA      NA 29.955752
273   NA    NA  122.9174379      3  NA      NA 30.015219
274   NA    NA  135.6828001      3  NA      NA 30.036409
275   NA    NA  149.7738854      3  NA      NA 30.131526
276   NA    NA  165.3283741      3  NA      NA 30.131526
277   NA    NA  182.4982454      3  NA      NA 30.131526
278   NA    NA  201.4512618      3  NA      NA 30.131526
279   NA    NA  222.3726086      3  NA      NA 30.131526
280   NA    NA  245.4667029      3  NA      NA 30.131526
281   NA    NA  270.9591915      3  NA      NA 30.131526
282   NA    NA  299.0991552      3  NA      NA 30.131526
283   NA    NA  330.1615426      3  NA      NA 30.131526
284   NA    NA  364.4498566      3  NA      NA 30.131526
285   NA    NA  402.2991197      3  NA      NA 30.131526
286   NA    NA  444.0791477      3  NA      NA 30.131526
287   NA    NA  490.1981630      3  NA      NA 30.131526
288   NA    NA  541.1067830      3  NA      NA 30.131526
289   NA    NA  597.3024232      3  NA      NA 30.131526
290   NA    NA  659.3341572      3  NA      NA 30.131526
291   NA    NA  727.8080818      3  NA      NA 30.131526
292   NA    NA  803.3932387      3  NA      NA 30.131526
293   NA    NA  886.8281517      3  NA      NA 30.131526
294   NA    NA  978.9280426      3  NA      NA 30.131526
295   NA    NA 1080.5927968      3  NA      NA 30.131526
296   NA    NA 1192.8157551      3  NA      NA 30.131526
297   NA    NA 1316.6934205      3  NA      NA 30.131526
298   NA    NA 1453.4361707      3  NA      NA 30.131526
299   NA    NA 1604.3800854      3  NA      NA 30.131526
300   NA    NA 1771.0000000      3  NA      NA 30.131526
301   NA    NA    0.1000000      4  NA      NA  8.428925
302   NA    NA    0.1103853      4  NA      NA  8.428925
303   NA    NA    0.1218492      4  NA      NA  8.428925
304   NA    NA    0.1345036      4  NA      NA  8.428925
305   NA    NA    0.1484722      4  NA      NA  8.428925
306   NA    NA    0.1638915      4  NA      NA  8.428925
307   NA    NA    0.1809122      4  NA      NA  8.428925
308   NA    NA    0.1997005      4  NA      NA  8.428925
309   NA    NA    0.2204400      4  NA      NA  8.428925
310   NA    NA    0.2433334      4  NA      NA  8.428925
311   NA    NA    0.2686043      4  NA      NA  8.428925
312   NA    NA    0.2964997      4  NA      NA  8.428925
313   NA    NA    0.3272921      4  NA      NA  8.428925
314   NA    NA    0.3612825      4  NA      NA  8.428925
315   NA    NA    0.3988028      4  NA      NA  8.428925
316   NA    NA    0.4402197      4  NA      NA  8.428925
317   NA    NA    0.4859379      4  NA      NA  8.428925
318   NA    NA    0.5364041      4  NA      NA  8.428925
319   NA    NA    0.5921113      4  NA      NA  8.428925
320   NA    NA    0.6536040      4  NA      NA  8.428925
321   NA    NA    0.7214828      4  NA      NA  8.428925
322   NA    NA    0.7964110      4  NA      NA  8.428925
323   NA    NA    0.8791208      4  NA      NA  8.428925
324   NA    NA    0.9704203      4  NA      NA  8.428925
325   NA    NA    1.0712015      4  NA      NA  8.428925
326   NA    NA    1.1824491      4  NA      NA  8.428925
327   NA    NA    1.3052502      4  NA      NA  8.428925
328   NA    NA    1.4408045      4  NA      NA  8.428925
329   NA    NA    1.5904366      4  NA      NA 10.026569
330   NA    NA    1.7556084      4  NA      NA 10.026569
331   NA    NA    1.9379339      4  NA      NA 10.026569
332   NA    NA    2.1391944      4  NA      NA 10.097620
333   NA    NA    2.3613565      4  NA      NA 10.097620
334   NA    NA    2.6065908      4  NA      NA 15.223820
335   NA    NA    2.8772934      4  NA      NA 15.223820
336   NA    NA    3.1761094      4  NA      NA 16.663703
337   NA    NA    3.5059583      4  NA      NA 16.986115
338   NA    NA    3.8700631      4  NA      NA 16.986115
339   NA    NA    4.2719813      4  NA      NA 17.538289
340   NA    NA    4.7156400      4  NA      NA 17.576236
341   NA    NA    5.2053741      4  NA      NA 17.624137
342   NA    NA    5.7459685      4  NA      NA 17.856198
343   NA    NA    6.3427054      4  NA      NA 17.856198
344   NA    NA    7.0014153      4  NA      NA 17.860112
345   NA    NA    7.7285343      4  NA      NA 18.384132
346   NA    NA    8.5311669      4  NA      NA 18.384132
347   NA    NA    9.4171554      4  NA      NA 18.506240
348   NA    NA   10.3951565      4  NA      NA 18.793446
349   NA    NA   11.4747262      4  NA      NA 20.196619
350   NA    NA   12.6664126      4  NA      NA 21.231373
351   NA    NA   13.9818594      4  NA      NA 21.545542
352   NA    NA   15.4339194      4  NA      NA 22.932070
353   NA    NA   17.0367805      4  NA      NA 23.278689
354   NA    NA   18.8061037      4  NA      NA 24.202492
355   NA    NA   20.7591766      4  NA      NA 24.221923
356   NA    NA   22.9150824      4  NA      NA 24.418336
357   NA    NA   25.2948857      4  NA      NA 24.600524
358   NA    NA   27.9218391      4  NA      NA 24.721067
359   NA    NA   30.8216098      4  NA      NA 26.393707
360   NA    NA   34.0225309      4  NA      NA 26.424651
361   NA    NA   37.5558777      4  NA      NA 27.052639
362   NA    NA   41.4561736      4  NA      NA 27.052639
363   NA    NA   45.7615276      4  NA      NA 27.598854
364   NA    NA   50.5140060      4  NA      NA 27.832015
365   NA    NA   55.7600443      4  NA      NA 27.832015
366   NA    NA   61.5509002      4  NA      NA 27.841674
367   NA    NA   67.9431546      4  NA      NA 28.207443
368   NA    NA   74.9992648      4  NA      NA 28.378639
369   NA    NA   82.7881742      4  NA      NA 28.430997
370   NA    NA   91.3859863      4  NA      NA 28.460565
371   NA    NA  100.8767082      4  NA      NA 28.461399
372   NA    NA  111.3530714      4  NA      NA 28.461399
373   NA    NA  122.9174379      4  NA      NA 28.522180
374   NA    NA  135.6828001      4  NA      NA 28.544542
375   NA    NA  149.7738854      4  NA      NA 28.649531
376   NA    NA  165.3283741      4  NA      NA 28.649531
377   NA    NA  182.4982454      4  NA      NA 28.649531
378   NA    NA  201.4512618      4  NA      NA 28.649531
379   NA    NA  222.3726086      4  NA      NA 28.649531
380   NA    NA  245.4667029      4  NA      NA 28.649531
381   NA    NA  270.9591915      4  NA      NA 28.649531
382   NA    NA  299.0991552      4  NA      NA 28.649531
383   NA    NA  330.1615426      4  NA      NA 28.649531
384   NA    NA  364.4498566      4  NA      NA 28.649531
385   NA    NA  402.2991197      4  NA      NA 28.649531
386   NA    NA  444.0791477      4  NA      NA 28.649531
387   NA    NA  490.1981630      4  NA      NA 28.649531
388   NA    NA  541.1067830      4  NA      NA 28.649531
389   NA    NA  597.3024232      4  NA      NA 28.649531
390   NA    NA  659.3341572      4  NA      NA 28.649531
391   NA    NA  727.8080818      4  NA      NA 28.649531
392   NA    NA  803.3932387      4  NA      NA 28.649531
393   NA    NA  886.8281517      4  NA      NA 28.649531
394   NA    NA  978.9280426      4  NA      NA 28.649531
395   NA    NA 1080.5927968      4  NA      NA 28.649531
396   NA    NA 1192.8157551      4  NA      NA 28.649531
397   NA    NA 1316.6934205      4  NA      NA 28.649531
398   NA    NA 1453.4361707      4  NA      NA 28.649531
399   NA    NA 1604.3800854      4  NA      NA 28.649531
400   NA    NA 1771.0000000      4  NA      NA 28.649531
401   NA    NA    0.1000000      5  NA      NA  6.999531
402   NA    NA    0.1103853      5  NA      NA  6.999531
403   NA    NA    0.1218492      5  NA      NA  6.999531
404   NA    NA    0.1345036      5  NA      NA  6.999531
405   NA    NA    0.1484722      5  NA      NA  6.999531
406   NA    NA    0.1638915      5  NA      NA  6.999531
407   NA    NA    0.1809122      5  NA      NA  6.999531
408   NA    NA    0.1997005      5  NA      NA  6.999531
409   NA    NA    0.2204400      5  NA      NA  6.999531
410   NA    NA    0.2433334      5  NA      NA  6.999531
411   NA    NA    0.2686043      5  NA      NA  6.999531
412   NA    NA    0.2964997      5  NA      NA  6.999531
413   NA    NA    0.3272921      5  NA      NA  6.999531
414   NA    NA    0.3612825      5  NA      NA  6.999531
415   NA    NA    0.3988028      5  NA      NA  6.999531
416   NA    NA    0.4402197      5  NA      NA  6.999531
417   NA    NA    0.4859379      5  NA      NA  6.999531
418   NA    NA    0.5364041      5  NA      NA  6.999531
419   NA    NA    0.5921113      5  NA      NA  6.999531
420   NA    NA    0.6536040      5  NA      NA  6.999531
421   NA    NA    0.7214828      5  NA      NA  6.999531
422   NA    NA    0.7964110      5  NA      NA  6.999531
423   NA    NA    0.8791208      5  NA      NA  6.999531
424   NA    NA    0.9704203      5  NA      NA  6.999531
425   NA    NA    1.0712015      5  NA      NA  6.999531
426   NA    NA    1.1824491      5  NA      NA  6.999531
427   NA    NA    1.3052502      5  NA      NA  6.999531
428   NA    NA    1.4408045      5  NA      NA  6.999531
429   NA    NA    1.5904366      5  NA      NA  8.242485
430   NA    NA    1.7556084      5  NA      NA  8.242485
431   NA    NA    1.9379339      5  NA      NA  8.242485
432   NA    NA    2.1391944      5  NA      NA  8.299944
433   NA    NA    2.3613565      5  NA      NA  8.299944
434   NA    NA    2.6065908      5  NA      NA 12.891059
435   NA    NA    2.8772934      5  NA      NA 12.891059
436   NA    NA    3.1761094      5  NA      NA 14.190581
437   NA    NA    3.5059583      5  NA      NA 14.480571
438   NA    NA    3.8700631      5  NA      NA 14.480571
439   NA    NA    4.2719813      5  NA      NA 14.958219
440   NA    NA    4.7156400      5  NA      NA 14.994170
441   NA    NA    5.2053741      5  NA      NA 14.998897
442   NA    NA    5.7459685      5  NA      NA 15.185881
443   NA    NA    6.3427054      5  NA      NA 15.185881
444   NA    NA    7.0014153      5  NA      NA 15.188343
445   NA    NA    7.7285343      5  NA      NA 15.532640
446   NA    NA    8.5311669      5  NA      NA 15.532640
447   NA    NA    9.4171554      5  NA      NA 15.604671
448   NA    NA   10.3951565      5  NA      NA 15.763563
449   NA    NA   11.4747262      5  NA      NA 17.104393
450   NA    NA   12.6664126      5  NA      NA 17.750604
451   NA    NA   13.9818594      5  NA      NA 17.984302
452   NA    NA   15.4339194      5  NA      NA 19.399744
453   NA    NA   17.0367805      5  NA      NA 19.744986
454   NA    NA   18.8061037      5  NA      NA 21.325005
455   NA    NA   20.7591766      5  NA      NA 21.340849
456   NA    NA   22.9150824      5  NA      NA 21.592203
457   NA    NA   25.2948857      5  NA      NA 21.856382
458   NA    NA   27.9218391      5  NA      NA 22.009137
459   NA    NA   30.8216098      5  NA      NA 24.054173
460   NA    NA   34.0225309      5  NA      NA 24.101170
461   NA    NA   37.5558777      5  NA      NA 24.736093
462   NA    NA   41.4561736      5  NA      NA 24.736093
463   NA    NA   45.7615276      5  NA      NA 25.218335
464   NA    NA   50.5140060      5  NA      NA 25.420046
465   NA    NA   55.7600443      5  NA      NA 25.420046
466   NA    NA   61.5509002      5  NA      NA 25.433817
467   NA    NA   67.9431546      5  NA      NA 25.809951
468   NA    NA   74.9992648      5  NA      NA 25.974738
469   NA    NA   82.7881742      5  NA      NA 26.027097
470   NA    NA   91.3859863      5  NA      NA 26.045023
471   NA    NA  100.8767082      5  NA      NA 26.045858
472   NA    NA  111.3530714      5  NA      NA 26.045858
473   NA    NA  122.9174379      5  NA      NA 26.100690
474   NA    NA  135.6828001      5  NA      NA 26.112327
475   NA    NA  149.7738854      5  NA      NA 26.194113
476   NA    NA  165.3283741      5  NA      NA 26.194113
477   NA    NA  182.4982454      5  NA      NA 26.194113
478   NA    NA  201.4512618      5  NA      NA 26.194113
479   NA    NA  222.3726086      5  NA      NA 26.194113
480   NA    NA  245.4667029      5  NA      NA 26.194113
481   NA    NA  270.9591915      5  NA      NA 26.194113
482   NA    NA  299.0991552      5  NA      NA 26.194113
483   NA    NA  330.1615426      5  NA      NA 26.194113
484   NA    NA  364.4498566      5  NA      NA 26.194113
485   NA    NA  402.2991197      5  NA      NA 26.194113
486   NA    NA  444.0791477      5  NA      NA 26.194113
487   NA    NA  490.1981630      5  NA      NA 26.194113
488   NA    NA  541.1067830      5  NA      NA 26.194113
489   NA    NA  597.3024232      5  NA      NA 26.194113
490   NA    NA  659.3341572      5  NA      NA 26.194113
491   NA    NA  727.8080818      5  NA      NA 26.194113
492   NA    NA  803.3932387      5  NA      NA 26.194113
493   NA    NA  886.8281517      5  NA      NA 26.194113
494   NA    NA  978.9280426      5  NA      NA 26.194113
495   NA    NA 1080.5927968      5  NA      NA 26.194113
496   NA    NA 1192.8157551      5  NA      NA 26.194113
497   NA    NA 1316.6934205      5  NA      NA 26.194113
498   NA    NA 1453.4361707      5  NA      NA 26.194113
499   NA    NA 1604.3800854      5  NA      NA 26.194113
500   NA    NA 1771.0000000      5  NA      NA 26.194113
loyn.fit <-
  loyn.boot |>
  dplyr::select(Boot, fit) |>
  unnest(fit) |>
  group_by(AREA, fGRAZE) |>
  ggdist::median_hdci(fit)

g1 <- loyn.fit |> ggplot(aes(y = fit, x = AREA, fill = fGRAZE, color = fGRAZE)) +
  geom_ribbon(aes(ymin = .lower, ymax = .upper), alpha = 0.3, color = NA) +
  geom_line() +
  scale_fill_viridis_d() +
  scale_colour_viridis_d() +
  scale_x_log10() +
  theme_classic()

loyn.infl <-
  loyn.boot |>
  dplyr::select(Boot, infl) |>
  unnest(infl) |>
  group_by(var) |>
  ggdist::median_hdci(rel.inf)


g2 <- loyn.inf |>
  arrange(rel.inf) |>
  mutate(var = factor(var, levels = unique(var))) |>
  ggplot(aes(y = var, x = rel.inf)) +
  geom_vline(xintercept = 12.5, linetype = "dashed") +
  geom_pointrange(aes(xmin = .lower, xmax = .upper)) +
  theme_classic()

g2 + patchwork::inset_element(g1, left = 0.3, bottom = 0.01, right = 1, top = 0.7)

g1 + patchwork::inset_element(g2, left = 0.5, bottom = 0.01, right = 1, top = 0.4)

library(randomForest)
loyn.rf <- randomForest(ABUND ~ DIST + LDIST + AREA + fGRAZE + ALT + YR.ISOL,
  data = loyn, importance = TRUE,
  ntree = 1000
)
loyn.imp <- randomForest::importance(loyn.rf)


## Rank by either:
## *MSE (mean decrease in accuracy)
## For each tree, calculate OOB prediction error.
## This also done after permuting predictors.
## Then average diff of prediction errors for each tree
## *NodePurity (mean decrease in node impurity)
## Measure of the total decline of impurity due to each
## predictor averaged over trees
100 * loyn.imp / sum(loyn.imp)
             %IncMSE IncNodePurity
DIST    -0.081225849      6.437963
LDIST   -0.001620151      7.087230
AREA     0.602557933     36.341377
fGRAZE   0.391291859     22.330945
ALT      0.109146005      9.226453
YR.ISOL  0.252253722     17.303628
varImpPlot(loyn.rf)

## use brute force
loyn.rf |>
  pdp::partial("AREA") |>
  autoplot()

## loyn.rf.acc <- env %>%
##     bind_cols(Pred = predict(loyn.rf,
##                              newdata=env))

## with(loyn.rf.acc,  cor(loyn, Pred))


## loyn.rf.acc %>%
##   ggplot() +
##   geom_point(aes(y=Pred,  x=loyn)) +
##   geom_point(data = loyn.acc, aes(y=Pred,  x=loyn), colour = 'red')

11 References

Loyn, R. H. 1987. “Nature Conservation: The Role of Remnants of Native Vegetation.” In, edited by D. A. Saunders, G. W. Arnold, A. A. Burbridge, and A. J. M. Hopkins. Chipping Norton, NSW: Surrey Beatty & Sons.